-
Notifications
You must be signed in to change notification settings - Fork 46
MTLSharedEvent for synchronization take 2
#690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/mtl/capture.jl b/lib/mtl/capture.jl
index 7d28793f..9f29bdd3 100644
--- a/lib/mtl/capture.jl
+++ b/lib/mtl/capture.jl
@@ -12,13 +12,13 @@ Use [`beginScope()`](@ref) and [`endScope()`](@ref) to set the boundaries for a
"""
MTLCaptureScope
-function MTLCaptureScope(queue::MTLDevice, manager=MTLCaptureManager())
+function MTLCaptureScope(queue::MTLDevice, manager = MTLCaptureManager())
handle = @objc [manager::id{MTLCaptureManager} newCaptureScopeWithDevice:queue::id{MTLDevice}]::id{MTLCaptureScope}
- MTLCaptureScope(handle)
+ return MTLCaptureScope(handle)
end
-function MTLCaptureScope(queue::MTLCommandQueue, manager=MTLCaptureManager())
+function MTLCaptureScope(queue::MTLCommandQueue, manager = MTLCaptureManager())
handle = @objc [manager::id{MTLCaptureManager} newCaptureScopeWithCommandQueue:queue::id{MTLCommandQueue}]::id{MTLCaptureScope}
- MTLCaptureScope(handle)
+ return MTLCaptureScope(handle)
end
# @objcwrapper MTLCaptureScope <: NSObject
@@ -68,7 +68,8 @@ function MTLCaptureDescriptor()
end
# TODO: Add capture state
-function MTLCaptureDescriptor(obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
+function MTLCaptureDescriptor(
+ obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
destination::MTLCaptureDestination;
folder::String=nothing)
desc = MTLCaptureDescriptor()
@@ -119,7 +120,8 @@ end
Start GPU frame capture using the default capture object and specifying capture descriptor parameters directly.
"""
-function startCapture(obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
+function startCapture(
+ obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
destination::MTLCaptureDestination=MTLCaptureDestinationGPUTraceDocument;
folder::String=nothing)
if destination == MTLCaptureDestinationGPUTraceDocument && folder === nothing
diff --git a/lib/mtl/events.jl b/lib/mtl/events.jl
index 374f4285..8d2345ed 100644
--- a/lib/mtl/events.jl
+++ b/lib/mtl/events.jl
@@ -29,9 +29,11 @@ function MTLSharedEvent(dev::MTLDevice)
return obj
end
-function waitUntilSignaledValue(ev::MTLSharedEvent, value, timeoutMS=typemax(UInt64))
- @objc [ev::id{MTLSharedEvent} waitUntilSignaledValue:value::UInt64
- timeoutMS:timeoutMS::UInt64]::Bool
+function waitUntilSignaledValue(ev::MTLSharedEvent, value, timeoutMS = typemax(UInt64))
+ return @objc [
+ ev::id{MTLSharedEvent} waitUntilSignaledValue:value::UInt64
+ timeoutMS:timeoutMS::UInt64
+ ]::Bool
end
## shared event handle
diff --git a/src/state.jl b/src/state.jl
index ffa5f53d..918731c4 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -61,7 +61,7 @@ end
Return the `MTLSharedEvent` used to synchronize a queue
"""
function queue_event(queue::MTLCommandQueue)
- get!(task_local_storage(), (:MTLSharedEvent, queue)) do
+ return get!(task_local_storage(), (:MTLSharedEvent, queue)) do
MTLSharedEvent(queue.device)
end::MTLSharedEvent
end
diff --git a/test/capturing.jl b/test/capturing.jl
index 9b1da7f5..7dee3cae 100644
--- a/test/capturing.jl
+++ b/test/capturing.jl
@@ -36,9 +36,9 @@ desc.captureObject = dev
desc.destination = MTL.MTLCaptureDestinationGPUTraceDocument
@test desc.destination == MTL.MTLCaptureDestinationGPUTraceDocument
-# Capture Manager supports destination
-@test !supports_destination(manager, MTL.MTLCaptureDestinationDeveloperTools)
-@test supports_destination(manager, MTL.MTLCaptureDestinationGPUTraceDocument)
+ # Capture Manager supports destination
+ @test !supports_destination(manager, MTL.MTLCaptureDestinationDeveloperTools)
+ @test supports_destination(manager, MTL.MTLCaptureDestinationGPUTraceDocument)
# Output URL
@test desc.outputURL === nothing
@@ -69,7 +69,7 @@ bufferA = MtlArray{Float32,1,SharedStorage}(undef, tuple(4))
@test manager.isCapturing == false
startCapture(manager, desc)
@test manager.isCapturing
-@test_throws "Capture manager is already capturing." startCapture(manager, desc)
+ @test_throws "Capture manager is already capturing." startCapture(manager, desc)
Metal.@sync @metal threads=4 tester(bufferA)
stopCapture(manager)
@test manager.isCapturing == false |
f695de3 to
7804676
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Metal Benchmarks
| Benchmark suite | Current: 04e52b9 | Previous: 822abde | Ratio |
|---|---|---|---|
latency/precompile |
25029149041 ns |
24961495792 ns |
1.00 |
latency/ttfp |
2129357833 ns |
2126570334 ns |
1.00 |
latency/import |
1223676125 ns |
1221863250 ns |
1.00 |
integration/metaldevrt |
836250 ns |
920646 ns |
0.91 |
integration/byval/slices=1 |
1544333 ns |
1636875 ns |
0.94 |
integration/byval/slices=3 |
9111916 ns |
8533229 ns |
1.07 |
integration/byval/reference |
1533291.5 ns |
1625125 ns |
0.94 |
integration/byval/slices=2 |
2568958 ns |
2708917 ns |
0.95 |
kernel/indexing |
568167 ns |
674520.5 ns |
0.84 |
kernel/indexing_checked |
572750 ns |
692979.5 ns |
0.83 |
kernel/launch |
12084 ns |
12292 ns |
0.98 |
kernel/rand |
552417 ns |
623458 ns |
0.89 |
array/construct |
6291 ns |
6250 ns |
1.01 |
array/broadcast |
593666.5 ns |
667000 ns |
0.89 |
array/random/randn/Float32 |
794125 ns |
822375 ns |
0.97 |
array/random/randn!/Float32 |
605666.5 ns |
627417 ns |
0.97 |
array/random/rand!/Int64 |
549958 ns |
555042 ns |
0.99 |
array/random/rand!/Float32 |
464000 ns |
588291 ns |
0.79 |
array/random/rand/Int64 |
774208 ns |
806208 ns |
0.96 |
array/random/rand/Float32 |
645208 ns |
649083 ns |
0.99 |
array/accumulate/Int64/1d |
1255834 ns |
1351708 ns |
0.93 |
array/accumulate/Int64/dims=1 |
1815625 ns |
1895167 ns |
0.96 |
array/accumulate/Int64/dims=2 |
2141166.5 ns |
2224583 ns |
0.96 |
array/accumulate/Int64/dims=1L |
11691792 ns |
11807458 ns |
0.99 |
array/accumulate/Int64/dims=2L |
9648791.5 ns |
9845792 ns |
0.98 |
array/accumulate/Float32/1d |
1100917 ns |
1211020.5 ns |
0.91 |
array/accumulate/Float32/dims=1 |
1542875 ns |
1623125 ns |
0.95 |
array/accumulate/Float32/dims=2 |
1858625 ns |
1913375 ns |
0.97 |
array/accumulate/Float32/dims=1L |
9777021 ns |
10039875 ns |
0.97 |
array/accumulate/Float32/dims=2L |
7226166 ns |
7298229.5 ns |
0.99 |
array/reductions/reduce/Int64/1d |
1280729.5 ns |
1341854 ns |
0.95 |
array/reductions/reduce/Int64/dims=1 |
1078083 ns |
1141520.5 ns |
0.94 |
array/reductions/reduce/Int64/dims=2 |
1176666 ns |
1277145.5 ns |
0.92 |
array/reductions/reduce/Int64/dims=1L |
2020708 ns |
2034687.5 ns |
0.99 |
array/reductions/reduce/Int64/dims=2L |
3404792 ns |
3549500 ns |
0.96 |
array/reductions/reduce/Float32/1d |
964958 ns |
1067125 ns |
0.90 |
array/reductions/reduce/Float32/dims=1 |
815000 ns |
883229.5 ns |
0.92 |
array/reductions/reduce/Float32/dims=2 |
751083 ns |
810979.5 ns |
0.93 |
array/reductions/reduce/Float32/dims=1L |
1330021 ns |
1373208.5 ns |
0.97 |
array/reductions/reduce/Float32/dims=2L |
1755500 ns |
1888250 ns |
0.93 |
array/reductions/mapreduce/Int64/1d |
1310875 ns |
1375438 ns |
0.95 |
array/reductions/mapreduce/Int64/dims=1 |
1079792 ns |
1176500 ns |
0.92 |
array/reductions/mapreduce/Int64/dims=2 |
1189562 ns |
1285062.5 ns |
0.93 |
array/reductions/mapreduce/Int64/dims=1L |
1904562.5 ns |
2087041 ns |
0.91 |
array/reductions/mapreduce/Int64/dims=2L |
3304000 ns |
3445167 ns |
0.96 |
array/reductions/mapreduce/Float32/1d |
985334 ns |
1065917 ns |
0.92 |
array/reductions/mapreduce/Float32/dims=1 |
802875 ns |
892125 ns |
0.90 |
array/reductions/mapreduce/Float32/dims=2 |
758625 ns |
809583 ns |
0.94 |
array/reductions/mapreduce/Float32/dims=1L |
1331812.5 ns |
1359000 ns |
0.98 |
array/reductions/mapreduce/Float32/dims=2L |
1775792 ns |
1898271 ns |
0.94 |
array/private/copyto!/gpu_to_gpu |
634333 ns |
654166 ns |
0.97 |
array/private/copyto!/cpu_to_gpu |
802708 ns |
826625 ns |
0.97 |
array/private/copyto!/gpu_to_cpu |
784458 ns |
813104.5 ns |
0.96 |
array/private/iteration/findall/int |
1571125 ns |
1668417 ns |
0.94 |
array/private/iteration/findall/bool |
1425333.5 ns |
1504500 ns |
0.95 |
array/private/iteration/findfirst/int |
1800541.5 ns |
1934521 ns |
0.93 |
array/private/iteration/findfirst/bool |
1652708 ns |
1855687 ns |
0.89 |
array/private/iteration/scalar |
4217125 ns |
4774709 ns |
0.88 |
array/private/iteration/logical |
2525375 ns |
2574750 ns |
0.98 |
array/private/iteration/findmin/1d |
1875812.5 ns |
2064500 ns |
0.91 |
array/private/iteration/findmin/2d |
1518667 ns |
1624500 ns |
0.93 |
array/private/copy |
568292 ns |
550458 ns |
1.03 |
array/shared/copyto!/gpu_to_gpu |
85125 ns |
83333 ns |
1.02 |
array/shared/copyto!/cpu_to_gpu |
82459 ns |
94500 ns |
0.87 |
array/shared/copyto!/gpu_to_cpu |
82167 ns |
82958 ns |
0.99 |
array/shared/iteration/findall/int |
1576021 ns |
1643125 ns |
0.96 |
array/shared/iteration/findall/bool |
1426479 ns |
1516125 ns |
0.94 |
array/shared/iteration/findfirst/int |
1325084 ns |
1457083 ns |
0.91 |
array/shared/iteration/findfirst/bool |
1308916.5 ns |
1419167 ns |
0.92 |
array/shared/iteration/scalar |
210584 ns |
159229.5 ns |
1.32 |
array/shared/iteration/logical |
2399542 ns |
2361416 ns |
1.02 |
array/shared/iteration/findmin/1d |
1392937.5 ns |
1620625 ns |
0.86 |
array/shared/iteration/findmin/2d |
1518708 ns |
1622208 ns |
0.94 |
array/shared/copy |
253458.5 ns |
245750 ns |
1.03 |
array/permutedims/4d |
2343333.5 ns |
2439396 ns |
0.96 |
array/permutedims/2d |
1146875 ns |
1226959 ns |
0.93 |
array/permutedims/3d |
1666333.5 ns |
1746270.5 ns |
0.95 |
metal/synchronization/stream |
18833 ns |
14833 ns |
1.27 |
metal/synchronization/context |
20042 ns |
15334 ns |
1.31 |
This comment was automatically generated by workflow using github-action-benchmark.
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #690 +/- ##
==========================================
+ Coverage 80.89% 81.20% +0.31%
==========================================
Files 62 62
Lines 2790 2810 +20
==========================================
+ Hits 2257 2282 +25
+ Misses 533 528 -5 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
63af0ed to
2a7c978
Compare
|
I suspect The current spinlock is a potential option. Another is maybe ensuring we synchronize after @maleadt any thoughts? |
Seems like the cleaner solution, no? |
|
Unfortunately synchronizing doesn't work. With 14d3d0e locally, when it fails, it usually takes ~90 ms for capture to stop. Should I just use a spinlock? |
|
Yeah, that seems reasonable. Do add some code comments about the reason though. |
- Default to capturing with MTLCaptureScope - Test improvments - Spinlock until capture is over in `stopCapture`
Add `waitUntilSignaledValue`
14d3d0e to
04e52b9
Compare
Not ready to merge as I haven't changed anything since #645 but leaving it up.
I'm considering maybe making it an experimental local preference that's off by default and then adding a special CI to try and catch failures.